require(pacman)
pacman:: p_load(pacman, dplyr, GGally, ggplot2, ggrepel, patchwork, gifski, ggforce, ggthemes, maps, sf, concaveman, remotes, readxl, ggthemes, ggvis, httr, plotly, rmarkdown, extrafont, shiny, isoband, stringr, rio, tidyr, labeling, caret, jquerylib, farver, corrgram, caTools, cowplot, randomForest, RMariaDB, lubridate, zoo, scales, ggfortify, forecast, doParallel,e1071,C50,kknn)
First step is to import the data and select the features that only apply to each phone.
###Original Data
df_iphone <- import("iphone_smallmatrix_labeled_8d.csv")
df_samsung <- import("galaxy_smallmatrix_labeled_9d.csv")
plot_ly(df_iphone, x= ~df_iphone$iphonesentiment, type='histogram')
plot_ly(df_samsung, x= ~df_samsung$galaxysentiment, type='histogram')
Where:
0: Sentiment Unclear
1: very negative
2: somewhat negative
3: neutral
4: somewhat positive
5: very positive
###Domain Expertise Feature Selection
df_iphoneDE <- select(df_iphone, c(ios, iphonecampos, iphonecamneg, iphonecamunc, iphonedispos, iphonedisneg, iphonedisunc, iphoneperpos,iphoneperneg,iphoneperunc,iosperpos,iosperneg,iosperunc,iphonesentiment))
df_samsungDE <- select(df_samsung, c(googleandroid, samsungcampos, samsungcamneg, samsungcamunc, samsungdispos, samsungdisneg, samsungdisunc, samsungperpos,samsungperneg,samsungperunc,googleperpos,googleperneg,googleperunc,galaxysentiment))
###Correlation Feature Selection
Corrmatrix = cor(df_iphone)
Corrmatrix2 = cor(df_samsung)
#findCorrelation(Corrmatrix, cutoff = .8, verbose = TRUE, names = TRUE)
#findCorrelation(Corrmatrix2, cutoff = .8, verbose = TRUE, names = TRUE)
df_iphoneCOR <- select(df_iphone, c(samsungdisneg, samsungperneg, samsungdispos, htcdisneg,googleperneg, googleperpos, samsungdisunc, samsungcamunc, htcperpos,nokiacamunc, nokiadisneg, nokiadispos, nokiaperunc, nokiacampos, nokiadisunc,nokiaperneg,nokiacamneg,iphonedisneg,
iphonedispos,sonydispos, iosperunc, iosperneg, ios, htcphone,iphonesentiment))
df_samsungCOR <- select(df_samsung,c(samsungdisneg,samsungperneg,samsungdispos,htcdisneg,googleperneg,googleperpos,samsungdisunc,samsungcamunc,htcperpos,nokiacamunc,nokiadisneg,nokiadispos,nokiaperunc,nokiacampos,nokiadisunc,nokiaperneg,nokiacamneg,iphonedisneg,iphonedispos,sonyperpos,iosperunc,iosperneg,sonydisneg,ios,htcphone,galaxysentiment))
###NZV Feature Selection
Near zero variance feature selection
nzv_iphone <- nearZeroVar(df_iphone, saveMetrics = FALSE)
nzv_samsung <- nearZeroVar(df_samsung, saveMetrics = FALSE)
df_iphoneNZV <- df_iphone[,-nzv_iphone]
df_samsungNZV <- df_samsung[,-nzv_samsung]
#str(df_iphoneNZV)
#str(df_samsungNZV)
Let’s sample the data before using RFE
set.seed(123)
iphoneSample <- df_iphone[sample(1:nrow(df_iphone), 1000, replace=FALSE),]
samsungSample <- df_samsung[sample(1:nrow(df_samsung), 1000, replace=FALSE),]
Set up rfeControl with randomforest, repeated cross validation and no updates
ctrl <- rfeControl(functions = rfFuncs, method = "repeatedcv", repeats = 5, verbose = FALSE)
#cl <- makeCluster(7)
#registerDoParallel(cl)
# Use rfe and omit the response variable (attribute 59 iphonesentiment)
rfeResults_iphone <- rfe(iphoneSample[,1:58], iphoneSample$iphonesentiment, sizes=(1:58), rfeControl=ctrl)
rfeResults_samsung <- rfe(samsungSample[,1:58], samsungSample$galaxysentiment, sizes=(1:58), rfeControl=ctrl)
# Get results
rfeResults_iphone
Recursive feature selection
Outer resampling method: Cross-Validated (10 fold, repeated 5 times)
Resampling performance over subset size:
The top 5 variables (out of 20):
iphone, googleandroid, iphonedispos, iphonedisneg, samsunggalaxy
rfeResults_samsung
Recursive feature selection
Outer resampling method: Cross-Validated (10 fold, repeated 5 times)
Resampling performance over subset size:
The top 5 variables (out of 14):
iphone, googleandroid, samsunggalaxy, iphonedispos, iphonecamunc
plot(rfeResults_iphone, type=c("g", "o"))
plot(rfeResults_samsung, type=c("g", "o"))
# create new data set with rfe recommended features
df_iphoneRFE <- df_iphone[,predictors(rfeResults_iphone)]
df_samsungRFE <- df_samsung[,predictors(rfeResults_samsung)]
# add the dependent variable to iphoneRFE
df_iphoneRFE$iphonesentiment <- df_iphone$iphonesentiment
df_samsungRFE$galaxysentiment <- df_samsung$galaxysentiment
df_iphone Original Data
df_samsung
df_iphoneDE Domain Expertise
df_samsungDE
df_iphoneCOR Correlation
df_samsungCOR
df_iphoneNZV Near zero variance feature selection
df_samsungNZV
df_iphoneRFE Recursive Feature Elimination
df_samsungRFE
df_iphone$iphonesentiment <- factor(df_iphone$iphonesentiment)
df_samsung$galaxysentiment <- factor(df_samsung$galaxysentiment)
df_iphoneDE$iphonesentiment <- factor(df_iphoneDE$iphonesentiment)
df_samsungDE$galaxysentiment <- factor(df_samsungDE$galaxysentiment)
df_iphoneCOR$iphonesentiment <- factor(df_iphoneCOR$iphonesentiment)
df_samsungCOR$galaxysentiment <- factor(df_samsungCOR$galaxysentiment)
df_iphoneNZV$iphonesentiment <- factor(df_iphoneNZV$iphonesentiment)
df_samsungNZV$galaxysentiment <- factor(df_samsungNZV$galaxysentiment)
df_iphoneRFE$iphonesentiment <- factor(df_iphoneRFE$iphonesentiment)
df_samsungRFE$galaxysentiment <- factor(df_samsungRFE$galaxysentiment)
Models with non-feature selected datasets and then feature selected data sets. C5.0, Random Forest, SVM, kknn
set.seed(123)
#iPhone
inTrain <- createDataPartition(df_iphone$iphonesentiment, p=.70, list = FALSE)
training_iphone <- df_iphone[ inTrain,]
testing_iphone <- df_iphone[-inTrain,]
inTrain <- createDataPartition(df_iphoneDE$iphonesentiment, p=.70, list = FALSE)
training_iphoneDE <- df_iphoneDE[ inTrain,]
testing_iphoneDE <- df_iphoneDE[-inTrain,]
inTrain <- createDataPartition(df_iphoneCOR$iphonesentiment, p=.70, list = FALSE)
training_iphoneCOR <- df_iphoneCOR[ inTrain,]
testing_iphoneCOR <- df_iphoneCOR[-inTrain,]
inTrain <- createDataPartition(df_iphoneNZV$iphonesentiment, p=.70, list = FALSE)
training_iphoneNZV <- df_iphoneNZV[ inTrain,]
testing_iphoneNZV <- df_iphoneNZV[-inTrain,]
inTrain <- createDataPartition(df_iphoneRFE$iphonesentiment, p=.70, list = FALSE)
training_iphoneRFE <- df_iphoneRFE[ inTrain,]
testing_iphoneRFE <- df_iphoneRFE[-inTrain,]
#Samsung
inTrain <- createDataPartition(df_samsung$galaxysentiment, p=.70, list = FALSE)
training_samsung <- df_samsung[ inTrain,]
testing_samsung <- df_samsung[-inTrain,]
inTrain <- createDataPartition(df_samsungDE$galaxysentiment, p=.70, list = FALSE)
training_samsungDE <- df_samsungDE[ inTrain,]
testing_samsungDE <- df_samsungDE[-inTrain,]
inTrain <- createDataPartition(df_samsungCOR$galaxysentiment, p=.70, list = FALSE)
training_samsungCOR <- df_samsungCOR[ inTrain,]
testing_samsungCOR <- df_samsungCOR[-inTrain,]
inTrain <- createDataPartition(df_samsungNZV$galaxysentiment, p=.70, list = FALSE)
training_samsungNZV <- df_samsungNZV[ inTrain,]
testing_samsungNZV <- df_samsungNZV[-inTrain,]
inTrain <- createDataPartition(df_samsungRFE$galaxysentiment, p=.70, list = FALSE)
training_samsungRFE <- df_samsungRFE[ inTrain,]
testing_samsungRFE <- df_samsungRFE[-inTrain,]
####Original Data Models
cl <- makeCluster(7)
registerDoParallel(cl)
ctrl <- trainControl(method = "repeatedcv", number = 5, repeats = 3)
RF_iphone_Original <- train(iphonesentiment~., data = training_iphone, method = "rf",trControl=ctrl, tuneLength = 1)
RF_samsung_Original <- train(galaxysentiment~., data = training_samsung, method = "rf",trControl=ctrl, tuneLength = 1)
SVM_iphone_Original <- train(iphonesentiment~., data = training_iphone, method = "svmLinear",trControl=ctrl, tuneLength = 3)
SVM_samsung_Original <- train(galaxysentiment~., data = training_samsung, method = "svmLinear",trControl=ctrl, tuneLength = 3)
C50_iphone_Original <- train(iphonesentiment~., data = training_iphone,method="C5.0",trControl=ctrl, tuneLength = 3)
C50_samsung_Original <- train(galaxysentiment~., data = training_samsung,method="C5.0",trControl=ctrl, tuneLength = 3)
kknn_iphone_Original<- train(iphonesentiment~., data = training_iphone, method = "kknn", trControl = ctrl, tuneLength = 3)
kknn_samsung_Original <- train(galaxysentiment~., data = training_samsung, method = "kknn", trControl = ctrl, tuneLength = 3)
###Predictions
RF_pred_iphone <- predict(RF_iphone_Original, newdata = testing_iphone)
C50_pred_iphone <- predict(C50_iphone_Original, newdata = testing_iphone)
SVM_pred_iphone <- predict(SVM_iphone_Original, newdata = testing_iphone)
KKNN_pred_iphone <- predict(kknn_iphone_Original, newdata = testing_iphone)
RF_pred_samsung <- predict(RF_samsung_Original, newdata = testing_samsung)
C50_pred_samsung <- predict(C50_samsung_Original, newdata = testing_samsung)
SVM_pred_samsung <- predict(SVM_samsung_Original, newdata = testing_samsung)
KKNN_pred_samsung <- predict(kknn_samsung_Original, newdata = testing_samsung)
cmRF_iphone <- confusionMatrix(RF_pred_iphone, testing_iphone$iphonesentiment)
cmRF_samsung <- confusionMatrix(RF_pred_samsung, testing_samsung$galaxysentiment)
cmRF_iphone
Confusion Matrix and Statistics
Reference
Prediction 0 1 2 3 4 5
0 374 0 1 0 5 7
1 0 0 0 0 0 0
2 0 0 17 0 0 0
3 2 0 0 233 3 2
4 2 0 1 1 128 2
5 210 117 117 122 295 2251
Overall Statistics
Accuracy : 0.772
95% CI : (0.7585, 0.7851)
No Information Rate : 0.5815
P-Value [Acc > NIR] : < 2.2e-16
Kappa : 0.553
Mcnemar's Test P-Value : NA
Statistics by Class:
Class: 0 Class: 1 Class: 2 Class: 3 Class: 4 Class: 5
Sensitivity 0.63605 0.00000 0.12500 0.65449 0.29698 0.9951
Specificity 0.99606 1.00000 1.00000 0.99802 0.99827 0.4711
Pos Pred Value 0.96641 NaN 1.00000 0.97083 0.95522 0.7233
Neg Pred Value 0.93891 0.96992 0.96927 0.96630 0.91933 0.9859
Prevalence 0.15116 0.03008 0.03496 0.09152 0.11080 0.5815
Detection Rate 0.09614 0.00000 0.00437 0.05990 0.03290 0.5787
Detection Prevalence 0.09949 0.00000 0.00437 0.06170 0.03445 0.8000
Balanced Accuracy 0.81606 0.50000 0.56250 0.82626 0.64762 0.7331
cmRF_samsung
Confusion Matrix and Statistics
Reference
Prediction 0 1 2 3 4 5
0 349 2 1 3 5 30
1 0 0 1 0 1 0
2 0 0 21 1 1 2
3 3 2 1 152 1 9
4 5 1 0 3 117 9
5 151 109 111 193 300 2287
Overall Statistics
Accuracy : 0.7559
95% CI : (0.742, 0.7693)
No Information Rate : 0.6037
P-Value [Acc > NIR] : < 2.2e-16
Kappa : 0.4991
Mcnemar's Test P-Value : < 2.2e-16
Statistics by Class:
Class: 0 Class: 1 Class: 2 Class: 3 Class: 4 Class: 5
Sensitivity 0.68701 0.0000000 0.155556 0.43182 0.27529 0.9786
Specificity 0.98781 0.9994677 0.998929 0.99545 0.99478 0.4368
Pos Pred Value 0.89487 0.0000000 0.840000 0.90476 0.86667 0.7258
Neg Pred Value 0.95432 0.9705350 0.970359 0.94599 0.91756 0.9306
Prevalence 0.13123 0.0294498 0.034875 0.09093 0.10979 0.6037
Detection Rate 0.09016 0.0000000 0.005425 0.03927 0.03022 0.5908
Detection Prevalence 0.10075 0.0005167 0.006458 0.04340 0.03487 0.8140
Balanced Accuracy 0.83741 0.4997338 0.577242 0.71364 0.63504 0.7077
ModelData_samsung <- resamples(list(RF_iphone = RF_iphone_Original, KKNN_iphone = kknn_iphone_Original, C50_iphone = C50_iphone_Original, SVM_iphone = SVM_iphone_Original, RF_Samsung = RF_samsung_Original, KKNN_Samsung = kknn_samsung_Original, C50_Samsung = C50_samsung_Original, SVM_Samsung = SVM_samsung_Original))
summary(ModelData_samsung)
Call:
summary.resamples(object = ModelData_samsung)
Models: RF_iphone, KKNN_iphone, C50_iphone, SVM_iphone, RF_Samsung, KKNN_Samsung, C50_Samsung, SVM_Samsung
Number of resamples: 15
Accuracy
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
RF_iphone 0.7527533 0.7606602 0.7654185 0.7660104 0.7712639 0.7787562 0
KKNN_iphone 0.3113311 0.3242592 0.3335168 0.3299952 0.3366368 0.3432343 0
C50_iphone 0.7652893 0.7684471 0.7709251 0.7722489 0.7743530 0.7863436 0
SVM_iphone 0.7024202 0.7050400 0.7070485 0.7097144 0.7140111 0.7198679 0
RF_Samsung 0.7448810 0.7473711 0.7534549 0.7528009 0.7576009 0.7634052 0
KKNN_Samsung 0.7210847 0.7289081 0.7354732 0.7369065 0.7423279 0.7584301 0
C50_Samsung 0.7546961 0.7608516 0.7654867 0.7658571 0.7681676 0.7814056 0
SVM_Samsung 0.6810392 0.6950747 0.6981758 0.6984165 0.7019628 0.7127836 0
Kappa
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
RF_iphone 0.5099650 0.5294884 0.5381855 0.5402964 0.5513654 0.5701253 0
KKNN_iphone 0.1423081 0.1553953 0.1658034 0.1625916 0.1710681 0.1776423 0
C50_iphone 0.5433409 0.5486972 0.5545884 0.5573681 0.5614709 0.5893433 0
SVM_iphone 0.3948751 0.4006885 0.4095749 0.4151116 0.4254707 0.4402169 0
RF_Samsung 0.4717044 0.4778292 0.4932631 0.4914112 0.5003910 0.5170210 0
KKNN_Samsung 0.4653630 0.4835829 0.4903545 0.4915303 0.4968022 0.5196451 0
C50_Samsung 0.5050557 0.5204220 0.5258856 0.5307879 0.5377643 0.5712356 0
SVM_Samsung 0.3290404 0.3534472 0.3652867 0.3662064 0.3767817 0.3988142 0
###Feture Selected Data Set
#RFE Feature Selection
RF_iphone_RFE <- train(iphonesentiment~., data = training_iphoneRFE, method = "rf",trControl=ctrl, tuneLength = 1)
RF_samsung_RFE <- train(galaxysentiment~., data = training_samsungRFE, method = "rf",trControl=ctrl, tuneLength = 1)
SVM_iphone_RFE <- train(iphonesentiment~., data = training_iphoneRFE, method = "svmLinear",trControl=ctrl, tuneLength = 3)
SVM_samsung_RFE <- train(galaxysentiment~., data = training_samsungRFE, method = "svmLinear",trControl=ctrl, tuneLength = 3)
C50_iphone_RFE <- train(iphonesentiment~., data = training_iphoneRFE,method="C5.0",trControl=ctrl, tuneLength = 3)
C50_samsung_RFE <- train(galaxysentiment~., data = training_samsungRFE,method="C5.0",trControl=ctrl, tuneLength = 3)
kknn_iphone_RFE<- train(iphonesentiment~., data = training_iphoneRFE, method = "kknn", trControl = ctrl, tuneLength = 3)
kknn_samsung_RFE <- train(galaxysentiment~., data = training_samsungRFE, method = "kknn", trControl = ctrl, tuneLength = 3)
#NZV Feature Selection
RF_iphone_NZV <- train(iphonesentiment~., data = training_iphoneNZV, method = "rf",trControl=ctrl, tuneLength = 1)
RF_samsung_NZV <- train(galaxysentiment~., data = training_samsungNZV, method = "rf",trControl=ctrl, tuneLength = 1)
SVM_iphone_NZV <- train(iphonesentiment~., data = training_iphoneNZV, method = "svmLinear",trControl=ctrl, tuneLength = 3)
SVM_samsung_NZV <- train(galaxysentiment~., data = training_samsungNZV, method = "svmLinear",trControl=ctrl, tuneLength = 3)
C50_iphone_NZV <- train(iphonesentiment~., data = training_iphoneNZV,method="C5.0",trControl=ctrl, tuneLength = 3)
C50_samsung_NZV <- train(galaxysentiment~., data = training_samsungNZV,method="C5.0",trControl=ctrl, tuneLength = 3)
kknn_iphone_NZV<- train(iphonesentiment~., data = training_iphoneNZV, method = "kknn", trControl = ctrl, tuneLength = 3)
kknn_samsung_NZV <- train(galaxysentiment~., data = training_samsungNZV, method = "kknn", trControl = ctrl, tuneLength = 3)
#COR Feature Selection
RF_iphone_COR <- train(iphonesentiment~., data = training_iphoneCOR, method = "rf",trControl=ctrl, tuneLength = 1)
RF_samsung_COR <- train(galaxysentiment~., data = training_samsungCOR, method = "rf",trControl=ctrl, tuneLength = 1)
SVM_iphone_COR <- train(iphonesentiment~., data = training_iphoneCOR, method = "svmLinear",trControl=ctrl, tuneLength = 3)
SVM_samsung_COR <- train(galaxysentiment~., data = training_samsungCOR, method = "svmLinear",trControl=ctrl, tuneLength = 3)
C50_iphone_COR <- train(iphonesentiment~., data = training_iphoneCOR,method="C5.0",trControl=ctrl, tuneLength = 3)
C50_samsung_COR <- train(galaxysentiment~., data = training_samsungCOR,method="C5.0",trControl=ctrl, tuneLength = 3)
kknn_iphone_COR<- train(iphonesentiment~., data = training_iphoneCOR, method = "kknn", trControl = ctrl, tuneLength = 3)
kknn_samsung_COR <- train(galaxysentiment~., data = training_samsungCOR, method = "kknn", trControl = ctrl, tuneLength = 3)
#DE Feature Selection
RF_iphone_DE <- train(iphonesentiment~., data = training_iphoneDE, method = "rf",trControl=ctrl, tuneLength = 1)
RF_samsung_DE <- train(galaxysentiment~., data = training_samsungDE, method = "rf",trControl=ctrl, tuneLength = 1)
SVM_iphone_DE <- train(iphonesentiment~., data = training_iphoneDE, method = "svmLinear",trControl=ctrl, tuneLength = 3)
SVM_samsung_DE <- train(galaxysentiment~., data = training_samsungDE, method = "svmLinear",trControl=ctrl, tuneLength = 3)
C50_iphone_DE <- train(iphonesentiment~., data = training_iphoneDE,method="C5.0",trControl=ctrl, tuneLength = 3)
C50_samsung_DE <- train(galaxysentiment~., data = training_samsungDE,method="C5.0",trControl=ctrl, tuneLength = 3)
kknn_iphone_DE<- train(iphonesentiment~., data = training_iphoneDE, method = "kknn", trControl = ctrl, tuneLength = 3)
kknn_samsung_DE <- train(galaxysentiment~., data = training_samsungDE, method = "kknn", trControl = ctrl, tuneLength = 3)
###All Results
ModelData_All_iPhone <- resamples(list(RF_iphone=RF_iphone_Original,
RF_iphone_DE=RF_iphone_DE,
RF_iphone_COR=RF_iphone_COR,
RF_iphone_NZV=RF_iphone_NZV,
RF_iphone_RFE=RF_iphone_RFE,
C50_iphone=C50_iphone_Original,
C50_iphone_DE=C50_iphone_DE,
C50_iphone_COR=C50_iphone_COR,
C50_iphone_NZV=C50_iphone_NZV,
C50_iphone_RFE=C50_iphone_RFE,
SVM_iphone=SVM_iphone_Original,
SVM_iphone_DE=SVM_iphone_DE,
SVM_iphone_COR=SVM_iphone_COR,
SVM_iphone_NZV=SVM_iphone_NZV,
SVM_iphone_RFE=SVM_iphone_RFE,
kknn_iphone=kknn_iphone_Original,
kknn_iphone_DE=kknn_iphone_DE,
kknn_iphone_COR=kknn_iphone_COR,
kknn_iphone_NZV=kknn_iphone_NZV,
kknn_iphone_RFE=kknn_iphone_RFE
))
ModelData_All_Samsung <- resamples(list(RF_samsung=RF_samsung_Original,
RF_samsung_DE=RF_samsung_DE,
RF_samsung_COR=RF_samsung_COR,
RF_samsung_NZV=RF_samsung_NZV,
RF_samsung_RFE=RF_samsung_RFE,
C50_samsung=C50_samsung_Original,
C50_samsung_DE=C50_samsung_DE,
C50_samsung_COR=C50_samsung_COR,
C50_samsung_NZV=C50_samsung_NZV,
C50_samsung_RFE=C50_samsung_RFE,
SVM_samsung=SVM_samsung_Original,
SVM_samsung_DE=SVM_samsung_DE,
SVM_samsung_COR=SVM_samsung_COR,
SVM_samsung_NZV=SVM_samsung_NZV,
SVM_samsung_RFE=SVM_samsung_RFE,
kknn_samsung=kknn_samsung_Original,
kknn_samsung_DE=kknn_samsung_DE,
kknn_samsung_COR=kknn_samsung_COR,
kknn_samsung_NZV=kknn_samsung_NZV,
kknn_samsung_RFE=kknn_samsung_RFE
))
summary(ModelData_All_iPhone)
Call:
summary.resamples(object = ModelData_All_iPhone)
Models: RF_iphone, RF_iphone_DE, RF_iphone_COR, RF_iphone_NZV, RF_iphone_RFE, C50_iphone, C50_iphone_DE, C50_iphone_COR, C50_iphone_NZV, C50_iphone_RFE, SVM_iphone, SVM_iphone_DE, SVM_iphone_COR, SVM_iphone_NZV, SVM_iphone_RFE, kknn_iphone, kknn_iphone_DE, kknn_iphone_COR, kknn_iphone_NZV, kknn_iphone_RFE
Number of resamples: 15
Accuracy
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
RF_iphone 0.7527533 0.7606602 0.7654185 0.7660104 0.7712639 0.7787562 0
RF_iphone_DE 0.6486784 0.6507981 0.6541850 0.6541534 0.6555614 0.6615215 0
RF_iphone_COR 0.6626307 0.6708869 0.6740088 0.6738604 0.6780407 0.6813671 0
RF_iphone_NZV 0.7443526 0.7539901 0.7577093 0.7583383 0.7643828 0.7688498 0
RF_iphone_RFE 0.7619571 0.7702883 0.7757576 0.7747099 0.7799500 0.7830396 0
C50_iphone 0.7652893 0.7684471 0.7709251 0.7722489 0.7743530 0.7863436 0
C50_iphone_DE 0.6439185 0.6465731 0.6507151 0.6512535 0.6540052 0.6674009 0
C50_iphone_COR 0.6567493 0.6688687 0.6721672 0.6707760 0.6749244 0.6784141 0
C50_iphone_NZV 0.7479362 0.7497247 0.7544053 0.7549275 0.7592292 0.7654185 0
C50_iphone_RFE 0.7538546 0.7693369 0.7721519 0.7722850 0.7783590 0.7830396 0
SVM_iphone 0.7024202 0.7050400 0.7070485 0.7097144 0.7140111 0.7198679 0
SVM_iphone_DE 0.5990099 0.6031353 0.6081453 0.6082783 0.6121045 0.6193619 0
SVM_iphone_COR 0.6505228 0.6549162 0.6565768 0.6570518 0.6584099 0.6659329 0
SVM_iphone_NZV 0.6688705 0.6799670 0.6831683 0.6837318 0.6875688 0.6953168 0
SVM_iphone_RFE 0.6875688 0.6958426 0.7046832 0.7018633 0.7070150 0.7140496 0
kknn_iphone 0.3113311 0.3242592 0.3335168 0.3299952 0.3366368 0.3432343 0
kknn_iphone_DE 0.2753304 0.2854394 0.2944414 0.2931132 0.2993385 0.3074807 0
kknn_iphone_COR 0.1922865 0.2007701 0.2031938 0.2038241 0.2069913 0.2124381 0
kknn_iphone_NZV 0.3036304 0.3117773 0.3193833 0.3203113 0.3297550 0.3357222 0
kknn_iphone_RFE 0.3238043 0.3314067 0.3346175 0.3387686 0.3461745 0.3595815 0
Kappa
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
RF_iphone 0.50996496 0.52948841 0.5381855 0.54029638 0.5513654 0.57012534 0
RF_iphone_DE 0.24203267 0.24881475 0.2536719 0.25685508 0.2598509 0.27716271 0
RF_iphone_COR 0.28609913 0.30843701 0.3159714 0.31690543 0.3287299 0.33501884 0
RF_iphone_NZV 0.49811202 0.51647615 0.5253460 0.52682907 0.5408159 0.54954349 0
RF_iphone_RFE 0.53493534 0.55188763 0.5617076 0.56200788 0.5737136 0.58070746 0
C50_iphone 0.54334094 0.54869724 0.5545884 0.55736812 0.5614709 0.58934333 0
C50_iphone_DE 0.23054282 0.23726041 0.2522310 0.25093415 0.2592462 0.29256443 0
C50_iphone_COR 0.28359364 0.30768331 0.3139249 0.31470938 0.3271151 0.33293018 0
C50_iphone_NZV 0.50231444 0.50868731 0.5170771 0.51931132 0.5270671 0.54365480 0
C50_iphone_RFE 0.51621618 0.54972052 0.5599136 0.55803825 0.5721061 0.58157193 0
SVM_iphone 0.39487506 0.40068848 0.4095749 0.41511164 0.4254707 0.44021686 0
SVM_iphone_DE 0.07506227 0.08931142 0.1039479 0.10263402 0.1139134 0.14361741 0
SVM_iphone_COR 0.25041839 0.26252328 0.2673713 0.26839347 0.2727810 0.28999175 0
SVM_iphone_NZV 0.31532691 0.34450251 0.3493697 0.35058385 0.3607934 0.38019645 0
SVM_iphone_RFE 0.36422870 0.39101839 0.4074289 0.40244334 0.4143686 0.42962060 0
kknn_iphone 0.14230806 0.15539530 0.1658034 0.16259161 0.1710681 0.17764234 0
kknn_iphone_DE 0.09441844 0.10780845 0.1215946 0.11646001 0.1233551 0.13232911 0
kknn_iphone_COR 0.04231185 0.05131120 0.0533050 0.05543538 0.0597170 0.06968645 0
kknn_iphone_NZV 0.12367763 0.13809726 0.1432977 0.14455215 0.1521444 0.16060356 0
kknn_iphone_RFE 0.15560938 0.16278367 0.1686654 0.17199882 0.1806159 0.19858417 0
summary(ModelData_All_Samsung)
Call:
summary.resamples(object = ModelData_All_Samsung)
Models: RF_samsung, RF_samsung_DE, RF_samsung_COR, RF_samsung_NZV, RF_samsung_RFE, C50_samsung, C50_samsung_DE, C50_samsung_COR, C50_samsung_NZV, C50_samsung_RFE, SVM_samsung, SVM_samsung_DE, SVM_samsung_COR, SVM_samsung_NZV, SVM_samsung_RFE, kknn_samsung, kknn_samsung_DE, kknn_samsung_COR, kknn_samsung_NZV, kknn_samsung_RFE
Number of resamples: 15
Accuracy
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
RF_samsung 0.7448810 0.7473711 0.7534549 0.7528009 0.7576009 0.7634052 0
RF_samsung_DE 0.6192584 0.6221669 0.6229961 0.6247413 0.6272125 0.6334992 0
RF_samsung_COR 0.6727474 0.6827978 0.6875000 0.6867631 0.6912861 0.6954596 0
RF_samsung_NZV 0.7466814 0.7513819 0.7570559 0.7561211 0.7590597 0.7686774 0
RF_samsung_RFE 0.7497238 0.7555309 0.7614831 0.7615425 0.7670172 0.7759956 0
C50_samsung 0.7546961 0.7608516 0.7654867 0.7658571 0.7681676 0.7814056 0
C50_samsung_DE 0.6196013 0.6205751 0.6232044 0.6233044 0.6239978 0.6325401 0
C50_samsung_COR 0.6751522 0.6813157 0.6838317 0.6836665 0.6873099 0.6913717 0
C50_samsung_NZV 0.7415606 0.7482014 0.7516593 0.7518426 0.7553889 0.7627212 0
C50_samsung_RFE 0.7504151 0.7564271 0.7606412 0.7608750 0.7629323 0.7795580 0
SVM_samsung 0.6810392 0.6950747 0.6981758 0.6984165 0.7019628 0.7127836 0
SVM_samsung_DE 0.6209945 0.6218534 0.6241017 0.6241518 0.6261062 0.6277655 0
SVM_samsung_COR 0.6624239 0.6700774 0.6709071 0.6717168 0.6737626 0.6815920 0
SVM_samsung_NZV 0.6718318 0.6759817 0.6790055 0.6794974 0.6828869 0.6882255 0
SVM_samsung_RFE 0.6742257 0.6829741 0.6858407 0.6851397 0.6874135 0.6976230 0
kknn_samsung 0.7210847 0.7289081 0.7354732 0.7369065 0.7423279 0.7584301 0
kknn_samsung_DE 0.1024363 0.1065300 0.1072416 0.1790937 0.1238697 0.6209186 0
kknn_samsung_COR 0.2472345 0.6334997 0.6426991 0.5953944 0.6586445 0.6738530 0
kknn_samsung_NZV 0.3861878 0.6816786 0.7232983 0.6628759 0.7423914 0.7508306 0
kknn_samsung_RFE 0.4327615 0.5907724 0.7302377 0.6576406 0.7366528 0.7472345 0
Kappa
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
RF_samsung 0.471704443 0.47782923 0.49326312 0.49141116 0.50039096 0.51702103 0
RF_samsung_DE 0.080102288 0.08973244 0.09277634 0.09933491 0.11055355 0.12675442 0
RF_samsung_COR 0.279823757 0.30589920 0.31570433 0.31410672 0.32615722 0.33523363 0
RF_samsung_NZV 0.484395520 0.49293198 0.50646251 0.50561755 0.51350313 0.53553756 0
RF_samsung_RFE 0.497402822 0.50453513 0.51863941 0.52068620 0.53366370 0.55353452 0
C50_samsung 0.505055743 0.52042198 0.52588564 0.53078789 0.53776430 0.57123557 0
C50_samsung_DE 0.074867518 0.08393829 0.08938664 0.09092923 0.09499727 0.11578560 0
C50_samsung_COR 0.273053308 0.29479059 0.30462978 0.30328561 0.31523558 0.32286941 0
C50_samsung_NZV 0.470230027 0.48540468 0.49807960 0.49684250 0.50849985 0.52419389 0
C50_samsung_RFE 0.494873470 0.51180857 0.52080839 0.52063059 0.52584749 0.56390783 0
SVM_samsung 0.329040440 0.35344724 0.36528667 0.36620641 0.37678172 0.39881421 0
SVM_samsung_DE 0.080310439 0.08509425 0.09485042 0.09388744 0.10190785 0.10888411 0
SVM_samsung_COR 0.245610109 0.26445854 0.26865324 0.27064970 0.27643416 0.30034144 0
SVM_samsung_NZV 0.294191474 0.30375774 0.31298348 0.31420041 0.32086661 0.33580192 0
SVM_samsung_RFE 0.310797394 0.33425984 0.33770745 0.33781919 0.34123333 0.37426061 0
kknn_samsung 0.465362960 0.48358292 0.49035445 0.49153031 0.49680220 0.51964512 0
kknn_samsung_DE 0.002185782 0.01117107 0.01296818 0.02125110 0.01548823 0.09799522 0
kknn_samsung_COR 0.084024358 0.23201955 0.27707846 0.25561741 0.31269379 0.32855773 0
kknn_samsung_NZV 0.195501328 0.40746971 0.44024950 0.40904987 0.48054266 0.50679814 0
kknn_samsung_RFE 0.235076982 0.35572479 0.48181726 0.42178075 0.49190294 0.50327961 0
##Large Matrices
dfLM_iphone <- import("LargeMatrix_iphone.csv")
dfLM_samsung <- import("LargeMatrix_samsung.csv")
dfLM_iphoneRFE <- dfLM_iphone[,predictors(rfeResults_iphone)]
dfLM_iphoneRFE$iphonesentiment <- dfLM_iphone$iphonesentiment
#No feature selection for Samsung
LM_Pred_iphone <- predict(RF_iphone_RFE, newdata = dfLM_iphoneRFE)
LM_Pred_Samsung <- predict(C50_samsung_Original, newdata = dfLM_samsung)
summary(LM_Pred_iphone)
0 1 2 3 4 5
44725 0 3029 2071 2 15021
summary(LM_Pred_Samsung)
0 1 2 3 4 5
39022 0 4482 2301 84 18959
summary(df_iphone$iphonesentiment)
0 1 2 3 4 5
1962 390 454 1188 1439 7540
summary(df_samsung$galaxysentiment)
0 1 2 3 4 5
1696 382 450 1175 1417 7791